#!/usr/bin/env python
# -*- coding:utf-8 -*-

import scrapy
from mySpider.items import ItcastItem


# 创建一个爬虫类
class ItcastSpider(scrapy.Spider):
    name = "itcast"
    allowed_domains = ["http://www.itcast.cn/"]
    start_urls = ["http://www.itcast.cn/channel/teacher.shtml#"]

    def parse(self, response):
        # with open("teacher.html", "w") as f:
        # 	f.write(response.body)
        teacher_list = response.xpath('//div[@class="li_txt"]')
        # 存放老师的集合
        # teacherItem = []
        for each in teacher_list:
            # .extract匹配的结果转为unicode字符串，不加就是匹配的对象
            name = each.xpath('./h3/text()').extract()
            title = each.xpath('./h4/text()').extract()
            info = each.xpath('./p/text()').extract()

            item = ItcastItem()
            item['name'] = name[0].encode('gbk', 'ignore')
            item['title'] = title[0].encode('gbk', 'ignore')
            item['info'] = info[0].encode('gbk', "ignore")

            # 将获取的元素交给pipelines
            yield item

            # teacherItem.append(item)

        # return teacherItem
